#coding=utf8
import urllib, BeautifulSoup,os,time

f=open(os.path.join(os.getcwd(),u"文献列表.txt"))
'''
文献列表内容格式如下：

Locata系统概况及启示http://www.cnki.net/KCMS/detail/detail.aspx?QueryID=18&CurRec=33&dbcode=CPFD&dbname=CPFDLAST2014&filename=WXDH201405009042&urlid=&yx=&v=MDI0MjFDbm5VN2ZOSWwwV01qWFBackc0SDlYTXFvOUZiZXNMRGhOS3VoZGhuajk4VG5qcXF4ZEVlTU9VS3JpZlp1SnVG
COMPASS与GPS伪距单点定位精度分析http://www.cnki.net/KCMS/detail/detail.aspx?QueryID=19&CurRec=2&dbcode=CPFD&dbname=CPFDLAST2013&filename=WXDH201305003032&urlid=&yx=&v=MjIxOTBpZlp1SnVGQ25uVTdmTkpGb1ZNalhQWnJHNEg5TE1xbzlGWitzTURoTkt1aGRobmo5OFRuanFxeGRFZU1PVUty
'''
p_list=f.readlines()
p_list=[(p[:p.find("http")],p[p.find("http"):]) for p in p_list]
f.close()

f=file(os.path.join(os.getcwd(),u'失败列表.txt'),'w')
f.write('')
f.close()

for p in p_list:
    print p[0].decode('utf8')
    name=p[0].replace('/','_')

    print u'1 获取网页内容'
    web=urllib.urlopen(p[1])
    soup=BeautifulSoup.BeautifulSoup(web.read())

    print u'2 获取下载地址'
    if soup.find(id="QK_nav"):
        link=soup.find(id="QK_nav")("a")[1]
        s=str(link)
        link="http://www.cnki.net"+s[s.find('/'):s.find('pdfdown\n')+7]
        file_path=os.path.join(os.getcwd(),name.decode('utf8')+u'.pdf')
    else:
        link=soup.find(id="nav")("a")[2]
        s=str(link)
        link="http://www.cnki.net/KCMS"+s[s.find('/down'):s.find('>')-1].replace('&amp;','&')
        file_path=os.path.join(os.getcwd(),name.decode('utf8')+u'.caj')


    print u'3 下载文件'
    urllib.urlretrieve(link,file_path)

    if int(os.path.getsize(file_path))<10000:
        print u'4 文件大小：%.2f KB < 10 KB' % (float(os.path.getsize(file_path))/1000)
        os.remove(file_path)
        print u'5 下载失败\n'
        f=file(os.path.join(os.getcwd(),u'失败列表.txt'),'a')
        f.write(p[0]+p[1])
        f.close()
    else:
        print u'4 文件大小：%.2f KB' % (float(os.path.getsize(file_path))/1000)
        print u'5 下载成功\n'
    time.sleep(1)

print u'全部下载完成  按回车退出程序'
raw_input()